import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn import metrics
import plotly.express as px
from warnings import filterwarnings
filterwarnings("ignore")
D:\anaconda files\lib\site-packages\scipy\__init__.py:155: UserWarning: A NumPy version >=1.18.5 and <1.25.0 is required for this version of SciPy (detected version 1.26.4
warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
data=pd.read_csv("C:\\Users\\laxma\\Downloads\\road_accident_data_by_vehicle_type.csv")
data
| Location | Motor Car | Dual Purpose Vehicle | Lorry | Cycle | Motor Cycle/Moped | Three wheeler | Articulated Vehicle, prime mover | SLT Bus | Private Bus | Intercity Bus | Land Vehicle/Tractor | Animal drawn vehicle or rider on animal | Other | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Ampara | 59 | 102 | 121 | 93 | 575 | 169 | 4 | 15 | 35 | 1 | 41 | 4 | 0 |
| 1 | Anuradhapura | 116 | 224 | 246 | 158 | 845 | 196 | 2 | 28 | 104 | 3 | 72 | 0 | 6 |
| 2 | Badulla (Badulla & Bandarawela Divisions) | 82 | 121 | 171 | 20 | 266 | 219 | 2 | 58 | 78 | 1 | 16 | 0 | 5 |
| 3 | Batticaloa | 26 | 43 | 30 | 64 | 224 | 52 | 4 | 9 | 25 | 1 | 24 | 1 | 2 |
| 4 | Chilaw (Chilaw & Puttalam Divisions) | 102 | 213 | 224 | 134 | 530 | 163 | 14 | 23 | 84 | 1 | 19 | 1 | 3 |
| 5 | Colombo (Colombo-South, North, Central, Mt. La... | 4708 | 2675 | 1686 | 221 | 2835 | 2804 | 110 | 190 | 1194 | 4 | 59 | 0 | 63 |
| 6 | Galle (Galle & Elpitiya Divisions) | 488 | 377 | 348 | 216 | 1346 | 586 | 16 | 73 | 224 | 3 | 16 | 0 | 9 |
| 7 | Gampaha (Gampaha, Negombo & Kelaniya Divisions) | 2018 | 1688 | 1352 | 317 | 2598 | 1466 | 175 | 123 | 590 | 9 | 38 | 4 | 6 |
| 8 | Jaffna (Jaffna & KKS Divisions) | 32 | 64 | 46 | 73 | 213 | 63 | 1 | 8 | 21 | 0 | 28 | 0 | 0 |
| 9 | Kalutara (Kalutara & Panadura Divisions) | 381 | 398 | 337 | 226 | 1022 | 525 | 15 | 53 | 238 | 4 | 15 | 0 | 6 |
| 10 | Kandy (Kandy & Gampola Divisions) | 735 | 637 | 526 | 29 | 746 | 895 | 4 | 148 | 403 | 4 | 10 | 1 | 18 |
| 11 | Kegalle (Kegalle & Seethawaka Divisions) | 281 | 323 | 357 | 23 | 500 | 545 | 11 | 82 | 224 | 1 | 15 | 0 | 12 |
| 12 | Kilinochchi (Kilinochchi & Mankulam Divisions) | 5 | 74 | 93 | 30 | 120 | 15 | 3 | 21 | 42 | 0 | 10 | 0 | 1 |
| 13 | Kurunegala (Kurunegala, Kuliyapitiya & Nikawer... | 333 | 428 | 522 | 193 | 1349 | 433 | 16 | 97 | 218 | 4 | 35 | 0 | 17 |
| 14 | Mannar | 12 | 35 | 26 | 18 | 71 | 35 | 0 | 1 | 8 | 0 | 7 | 0 | 0 |
| 15 | Matale | 118 | 192 | 246 | 68 | 384 | 306 | 8 | 50 | 110 | 3 | 15 | 0 | 7 |
| 16 | Matara | 138 | 200 | 163 | 107 | 593 | 297 | 0 | 26 | 131 | 2 | 13 | 0 | 1 |
| 17 | Monaragala | 64 | 104 | 162 | 58 | 389 | 197 | 1 | 21 | 47 | 0 | 27 | 0 | 2 |
| 18 | Mulathivu | 1 | 6 | 2 | 9 | 5 | 0 | 0 | 0 | 3 | 0 | 1 | 0 | 0 |
| 19 | Nuwara-Eliya (Nuwara-Eliya & Hatton Divisions) | 80 | 136 | 130 | 12 | 89 | 247 | 4 | 32 | 83 | 1 | 6 | 0 | 14 |
| 20 | Polonnaruwa | 36 | 92 | 136 | 63 | 348 | 95 | 1 | 18 | 31 | 1 | 36 | 0 | 0 |
| 21 | Ratnapura | 182 | 234 | 321 | 42 | 553 | 443 | 9 | 38 | 155 | 2 | 8 | 0 | 0 |
| 22 | Tangalle (Tangalle Division) | 59 | 236 | 148 | 66 | 480 | 110 | 11 | 34 | 70 | 0 | 36 | 0 | 2 |
| 23 | Trincomalee (Trincomalee & Kantale Divisions) | 23 | 72 | 78 | 63 | 286 | 129 | 2 | 12 | 30 | 0 | 27 | 3 | 4 |
| 24 | Vavuniya | 4 | 15 | 32 | 26 | 66 | 20 | 0 | 3 | 3 | 0 | 2 | 0 | 0 |
data.head()
| Location | Motor Car | Dual Purpose Vehicle | Lorry | Cycle | Motor Cycle/Moped | Three wheeler | Articulated Vehicle, prime mover | SLT Bus | Private Bus | Intercity Bus | Land Vehicle/Tractor | Animal drawn vehicle or rider on animal | Other | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Ampara | 59 | 102 | 121 | 93 | 575 | 169 | 4 | 15 | 35 | 1 | 41 | 4 | 0 |
| 1 | Anuradhapura | 116 | 224 | 246 | 158 | 845 | 196 | 2 | 28 | 104 | 3 | 72 | 0 | 6 |
| 2 | Badulla (Badulla & Bandarawela Divisions) | 82 | 121 | 171 | 20 | 266 | 219 | 2 | 58 | 78 | 1 | 16 | 0 | 5 |
| 3 | Batticaloa | 26 | 43 | 30 | 64 | 224 | 52 | 4 | 9 | 25 | 1 | 24 | 1 | 2 |
| 4 | Chilaw (Chilaw & Puttalam Divisions) | 102 | 213 | 224 | 134 | 530 | 163 | 14 | 23 | 84 | 1 | 19 | 1 | 3 |
data.tail()
| Location | Motor Car | Dual Purpose Vehicle | Lorry | Cycle | Motor Cycle/Moped | Three wheeler | Articulated Vehicle, prime mover | SLT Bus | Private Bus | Intercity Bus | Land Vehicle/Tractor | Animal drawn vehicle or rider on animal | Other | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 20 | Polonnaruwa | 36 | 92 | 136 | 63 | 348 | 95 | 1 | 18 | 31 | 1 | 36 | 0 | 0 |
| 21 | Ratnapura | 182 | 234 | 321 | 42 | 553 | 443 | 9 | 38 | 155 | 2 | 8 | 0 | 0 |
| 22 | Tangalle (Tangalle Division) | 59 | 236 | 148 | 66 | 480 | 110 | 11 | 34 | 70 | 0 | 36 | 0 | 2 |
| 23 | Trincomalee (Trincomalee & Kantale Divisions) | 23 | 72 | 78 | 63 | 286 | 129 | 2 | 12 | 30 | 0 | 27 | 3 | 4 |
| 24 | Vavuniya | 4 | 15 | 32 | 26 | 66 | 20 | 0 | 3 | 3 | 0 | 2 | 0 | 0 |
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 25 entries, 0 to 24 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Location 25 non-null object 1 Motor Car 25 non-null int64 2 Dual Purpose Vehicle 25 non-null int64 3 Lorry 25 non-null int64 4 Cycle 25 non-null int64 5 Motor Cycle/Moped 25 non-null int64 6 Three wheeler 25 non-null int64 7 Articulated Vehicle, prime mover 25 non-null int64 8 SLT Bus 25 non-null int64 9 Private Bus 25 non-null int64 10 Intercity Bus 25 non-null int64 11 Land Vehicle/Tractor 25 non-null int64 12 Animal drawn vehicle or rider on animal 25 non-null int64 13 Other 25 non-null int64 dtypes: int64(13), object(1) memory usage: 2.9+ KB
data.describe()
| Motor Car | Dual Purpose Vehicle | Lorry | Cycle | Motor Cycle/Moped | Three wheeler | Articulated Vehicle, prime mover | SLT Bus | Private Bus | Intercity Bus | Land Vehicle/Tractor | Animal drawn vehicle or rider on animal | Other | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 25.000000 | 25.000000 | 25.000000 | 25.000000 | 25.000000 | 25.000000 | 25.000000 | 25.000000 | 25.000000 | 25.000000 | 25.000000 | 25.000000 | 25.000000 |
| mean | 403.320000 | 347.560000 | 300.120000 | 93.160000 | 657.320000 | 400.400000 | 16.520000 | 46.520000 | 166.040000 | 1.800000 | 23.040000 | 0.560000 | 7.120000 |
| std | 987.465979 | 589.583474 | 396.740818 | 83.097673 | 718.236714 | 597.698224 | 39.428332 | 48.523293 | 254.023536 | 2.101587 | 17.350504 | 1.227464 | 12.839782 |
| min | 1.000000 | 6.000000 | 2.000000 | 9.000000 | 5.000000 | 0.000000 | 0.000000 | 0.000000 | 3.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 |
| 25% | 32.000000 | 74.000000 | 93.000000 | 29.000000 | 224.000000 | 95.000000 | 1.000000 | 15.000000 | 31.000000 | 0.000000 | 10.000000 | 0.000000 | 0.000000 |
| 50% | 82.000000 | 192.000000 | 163.000000 | 64.000000 | 480.000000 | 197.000000 | 4.000000 | 28.000000 | 83.000000 | 1.000000 | 16.000000 | 0.000000 | 3.000000 |
| 75% | 281.000000 | 323.000000 | 337.000000 | 134.000000 | 746.000000 | 443.000000 | 11.000000 | 58.000000 | 218.000000 | 3.000000 | 35.000000 | 0.000000 | 7.000000 |
| max | 4708.000000 | 2675.000000 | 1686.000000 | 317.000000 | 2835.000000 | 2804.000000 | 175.000000 | 190.000000 | 1194.000000 | 9.000000 | 72.000000 | 4.000000 | 63.000000 |
data.isnull().sum()
Location 0 Motor Car 0 Dual Purpose Vehicle 0 Lorry 0 Cycle 0 Motor Cycle/Moped 0 Three wheeler 0 Articulated Vehicle, prime mover 0 SLT Bus 0 Private Bus 0 Intercity Bus 0 Land Vehicle/Tractor 0 Animal drawn vehicle or rider on animal 0 Other 0 dtype: int64
data.duplicated().sum()
0
data.shape
(25, 14)
#VISUALIZATION
plt.scatter(data['Cycle'],data['SLT Bus'])
plt.xticks(rotation=90)
plt.show()
fig=px.bar(data,x='Lorry',y='Location',color='Lorry')
fig.show()
fig=px.violin(data,x='Intercity Bus',y='SLT Bus',color='Intercity Bus')
fig.show()
fig=px.bar(data,x='Other',y='Land Vehicle/Tractor',color='Land Vehicle/Tractor')
fig.show()
plt.scatter(data['Dual Purpose Vehicle'],data['Cycle'])
plt.bar(data['Motor Car'],data['SLT Bus'],color='red')
plt.xticks(rotation=90)
plt.show()
plt.figure(figsize=(10,4))
sns.countplot(x='Animal drawn vehicle or rider on animal', data=data, color='cyan')
plt.title('Animal drawn vehicle v/s rider on animal')
plt.show()
plt.figure(figsize=(10,4))
top_car = data['Other'].value_counts().nlargest(10)
sns.countplot(y=data.Other,color='green')
<AxesSubplot:xlabel='count', ylabel='Other'>
sns.lineplot(x='Cycle', y='Articulated Vehicle, prime mover', data=data).set_title('Variation of Cycle with Articulated Vehicle, prime mover')
Text(0.5, 1.0, 'Variation of Cycle with Articulated Vehicle, prime mover')
sns.barplot(data['Land Vehicle/Tractor'],data['Motor Cycle/Moped'],color='r')
plt.xticks(rotation=90)
plt.show()
plt.figure(figsize=(8, 4))
sns.scatterplot(data=data, x='Animal drawn vehicle or rider on animal', y='Location')
plt.title('Animal drawn vehicle or rider on animalin the Location')
plt.xlabel('Animal drawn vehicle or rider on animal')
plt.ylabel('Location')
plt.show()
sns.displot(data["Dual Purpose Vehicle"])
<seaborn.axisgrid.FacetGrid at 0x1b0ce0c3fa0>
sns.relplot(x='Lorry',y='Private Bus',data=data)
<seaborn.axisgrid.FacetGrid at 0x1b0ce17ceb0>
sns.countplot(x='Land Vehicle/Tractor',data=data)
plt.xticks(rotation=90)
(array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18]),
[Text(0, 0, '1'),
Text(1, 0, '2'),
Text(2, 0, '6'),
Text(3, 0, '7'),
Text(4, 0, '8'),
Text(5, 0, '10'),
Text(6, 0, '13'),
Text(7, 0, '15'),
Text(8, 0, '16'),
Text(9, 0, '19'),
Text(10, 0, '24'),
Text(11, 0, '27'),
Text(12, 0, '28'),
Text(13, 0, '35'),
Text(14, 0, '36'),
Text(15, 0, '38'),
Text(16, 0, '41'),
Text(17, 0, '59'),
Text(18, 0, '72')])
sns.boxplot(x='Land Vehicle/Tractor',y='Cycle',data=data)
<AxesSubplot:xlabel='Land Vehicle/Tractor', ylabel='Cycle'>
sns.violinplot(x='Intercity Bus',y='Animal drawn vehicle or rider on animal',data=data)
<AxesSubplot:xlabel='Intercity Bus', ylabel='Animal drawn vehicle or rider on animal'>
#MODEL BUILDING
data['months']=2024-data['Other']
data.drop('Other',axis=1,inplace=True)
data.head()
| Location | Motor Car | Dual Purpose Vehicle | Lorry | Cycle | Motor Cycle/Moped | Three wheeler | Articulated Vehicle, prime mover | SLT Bus | Private Bus | Intercity Bus | Land Vehicle/Tractor | Animal drawn vehicle or rider on animal | months | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Ampara | 59 | 102 | 121 | 93 | 575 | 169 | 4 | 15 | 35 | 1 | 41 | 4 | 2024 |
| 1 | Anuradhapura | 116 | 224 | 246 | 158 | 845 | 196 | 2 | 28 | 104 | 3 | 72 | 0 | 2018 |
| 2 | Badulla (Badulla & Bandarawela Divisions) | 82 | 121 | 171 | 20 | 266 | 219 | 2 | 58 | 78 | 1 | 16 | 0 | 2019 |
| 3 | Batticaloa | 26 | 43 | 30 | 64 | 224 | 52 | 4 | 9 | 25 | 1 | 24 | 1 | 2022 |
| 4 | Chilaw (Chilaw & Puttalam Divisions) | 102 | 213 | 224 | 134 | 530 | 163 | 14 | 23 | 84 | 1 | 19 | 1 | 2021 |
data.rename(columns={'Motor Car':'car', 'Dual Purpose Vehicle':'bike', 'Lorry':'6 wheeler', 'Cycle':'bycycle'},inplace=True)
data.columns
Index(['Location', 'car', 'bike', '6 wheeler', 'bycycle', 'Motor Cycle/Moped',
'Three wheeler', 'Articulated Vehicle, prime mover', 'SLT Bus',
'Private Bus', 'Intercity Bus', 'Land Vehicle/Tractor',
'Animal drawn vehicle or rider on animal', 'months'],
dtype='object')
cat_cols=['6 wheeler','Intercity Bus','SLT Bus','Three wheeler']
i=0
while i<4:
fig=plt.figure(figsize=[10,6])
plt.subplot(1,2,1)
sns.countplot(x=cat_cols[i],data=data)
plt.xticks(rotation=90)
i += 1
plt.subplot(1,2,2)
sns.countplot(x=cat_cols[i],data=data)
i += 1
plt.xticks(rotation=90)
plt.show()
num_cols=['Motor Cycle/Moped','bike','Land Vehicle/Tractor','Private Bus']
i=0
while i<4:
fig=plt.figure(figsize=[13,3])
plt.subplot(1,2,1)
sns.violinplot(x=num_cols[i],data=data)
i += 1
plt.subplot(1,2,2)
sns.violinplot(x=num_cols[i],data=data)
i += 1
plt.show()
data.drop(labels = 'Location', axis = 1, inplace = True)
data = pd.get_dummies(data=data, drop_first = True)
sns.heatmap(data.corr(), annot=True, cmap="RdBu")
plt.show()
data.corr()['Motor Cycle/Moped']
car 0.862189 bike 0.920462 6 wheeler 0.951255 bycycle 0.867273 Motor Cycle/Moped 1.000000 Three wheeler 0.895465 Articulated Vehicle, prime mover 0.868628 SLT Bus 0.821950 Private Bus 0.893898 Intercity Bus 0.832529 Land Vehicle/Tractor 0.550981 Animal drawn vehicle or rider on animal 0.276366 months -0.694734 Name: Motor Cycle/Moped, dtype: float64
x=data.drop('Motor Cycle/Moped', axis=1)
y=data['Animal drawn vehicle or rider on animal']
x.head()
| car | bike | 6 wheeler | bycycle | Three wheeler | Articulated Vehicle, prime mover | SLT Bus | Private Bus | Intercity Bus | Land Vehicle/Tractor | Animal drawn vehicle or rider on animal | months | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 59 | 102 | 121 | 93 | 169 | 4 | 15 | 35 | 1 | 41 | 4 | 2024 |
| 1 | 116 | 224 | 246 | 158 | 196 | 2 | 28 | 104 | 3 | 72 | 0 | 2018 |
| 2 | 82 | 121 | 171 | 20 | 219 | 2 | 58 | 78 | 1 | 16 | 0 | 2019 |
| 3 | 26 | 43 | 30 | 64 | 52 | 4 | 9 | 25 | 1 | 24 | 1 | 2022 |
| 4 | 102 | 213 | 224 | 134 | 163 | 14 | 23 | 84 | 1 | 19 | 1 | 2021 |
y.head()
0 4 1 0 2 0 3 1 4 1 Name: Animal drawn vehicle or rider on animal, dtype: int64
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
x_train,x_test,y_train,y_test=train_test_split(x, y, test_size=0.2,random_state=42)
print("x train: ",x_train.shape)
print("x test: ",x_test.shape)
print("y train: ",y_train.shape)
print("y test: ",y_test.shape)
x train: (20, 12) x test: (5, 12) y train: (20,) y test: (5,)
model = LinearRegression()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print("coefficients:",model.coef_)
print("intercept:", model.intercept_)
coefficients: [ 9.29927312e-19 -1.00516561e-17 -1.31644212e-18 -3.10400721e-18 -3.01568310e-18 8.58355465e-17 -1.43662948e-17 2.61252411e-17 -1.63154583e-17 1.63796059e-17 1.00000000e+00 -8.71101313e-17] intercept: 1.7524870443708096e-13
from sklearn.metrics import mean_squared_error
mse=mean_squared_error(y_test,y_pred)
print("mean squared error:",mse)
mean squared error: 3.511108096713696e-30
error=y_test,-y_pred
print(error)
(8 0
16 0
0 4
23 3
11 0
Name: Animal drawn vehicle or rider on animal, dtype: int64, array([ 1.17472226e-15, 1.07027222e-15, -4.00000000e+00, -3.00000000e+00,
-6.60423143e-16]))
import seaborn as sns
import matplotlib.pyplot as plt
error=y_test-y_pred
data=pd.DataFrame({'y_pred':y_pred, 'error': error})
sns.regplot(x='y_pred', y='error', data=data)
plt.show()